---
title: "Figure A.5"
output: 
---

# Figure A.5

# Who's to Blame? Postconflict Violence and Public Attitudes Towards Peace Agreements
# Wyer, Frank. 

#clear environment
```{r clear environment}
rm(list = ls())
```

# uncomment and set working directory to replication archive
# setwd("~/blame_replication")

# Uncomment to install packages if necessary
# install.packages("tidyverse")
# install.packages("readxl")
# install.packages("cowplot")
# install.packages("scales")
# install.packages("ggpubfigs")
# devtools::install_github("JLSteenwyk/ggpubfigs")


#load packages
```{r}
library(tidyverse)
library(readxl)
library(cowplot)
library(scales)
library(ggpubfigs)
```

#read in survey data
```{r}
survey_clean <- read.csv("survey_clean.csv")
```

#read in census population data
```{r}
census_df <- read_xlsx("Raw Data Files/census_data_DANE.xlsx", skip = 11)
```

#clean census data
```{r clean census}
census_pop_22 <- census_df %>% filter(AÑO == 2022 & `ÁREA GEOGRÁFICA` != "Total") %>% dplyr::select(!contains("Total")) %>% pivot_longer(Hombres_0:`Mujeres_100 y más`, names_to = "demo", values_to = "pop") %>% separate_wider_delim(demo, names = c("gender", "age"), delim = "_") %>% rename(year = AÑO, areatype = `ÁREA GEOGRÁFICA`) %>% mutate(age = as.numeric(str_remove(age, " y más"))) %>% filter(age > 17) #limit to adult population in 2022
```

#generate population categories to match survey categories
```{r}
census_pop_22 <- census_pop_22 %>% mutate(
age_cat = case_when( #variable for age categories
age < 26 ~ 1, 
age > 25 & age < 36 ~ 2,
age > 35 & age < 46 ~ 3,
age > 45 & age < 56 ~ 4,
age > 55 & age < 66 ~ 5,
age > 65 ~ 6
),
area_cat = case_when( #variable for urban center or rural periphery
areatype == "Cabecera" ~ 1,
areatype == "Centros Poblados y Rural Disperso" ~ 2
),
sex_cat = case_when( #variable for male or female
gender == "Hombres" ~ 1,
gender == "Mujeres" ~ 2
)
)
```

#aggregate to strata and calculate proportions in population
```{r}
totpop22 <- census_pop_22 %>% ungroup() %>% summarise(totpop = sum(pop)) %>% as_vector() #total adult population

census_pop_22 <- census_pop_22 %>% group_by(area_cat, age_cat, sex_cat) %>% summarise(stratapop = sum(pop), strataprop = stratapop/totpop22) #proportion of population within each strata
```

#summarise sex distribution in census and among survey respondents
```{r respondent sex data}
sex_plot_data <- rbind(census_pop_22 %>% group_by(sex_cat) %>% summarise(sex_prop = sum(stratapop)/totpop22) %>% mutate(sex_cat_verbose = ifelse(sex_cat == 1, "Male", "Female"), group = "Census"),
survey_clean %>% group_by(sex_cat) %>% summarise(sex_prop = n() / nrow(survey_clean)) %>% mutate(sex_cat_verbose = ifelse(sex_cat == 1, "Male", "Female"), group = "Survey"))
```

#plot respondent and census sex distribution
```{r plot sex distributions}
sex_plot <- ggplot(sex_plot_data, aes(y = sex_prop, x=group, fill = sex_cat_verbose)) +
geom_bar(stat = "identity") +
  scale_y_continuous(limits = c(0, 1), labels=percent_format()) +
  scale_fill_manual(name = "", values = c("#EE442F", "#63ACBE")) +
  labs(
    title = "",
    x = "",
    y = "",
    fill = ""
  ) +
  theme_minimal() +
  theme(legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )  
```

#summarise urban/rural distribution in census and among survey respondents
```{r}
rural_plot_data <- rbind(census_pop_22 %>% group_by(area_cat) %>% summarise(area_prop = sum(stratapop)/totpop22) %>% mutate(area_cat_verbose = ifelse(area_cat == 1, "Municipal Center", "Rural Periphery"), group = "Census"),
survey_clean %>% group_by(area_cat) %>% summarise(area_prop = n() / nrow(survey_clean)) %>% mutate(area_cat_verbose = ifelse(area_cat == 1, "Municipal Center", "Rural Periphery"), group = "Survey")) 
```

#plot respondent and census urban/rural distribution
```{r plot sex distributions}
rural_plot <- ggplot(rural_plot_data, aes(y = area_prop, x=group, fill = area_cat_verbose)) +
geom_bar(stat = "identity") +
  scale_y_continuous(limits = c(0, 1), labels=percent_format()) +
  scale_fill_manual(name = "", values = c("#56B4E9", "#E69F00")) +
  labs(
    title = "",
    x = "",
    y = "",
    fill = ""
  ) +
  theme_minimal() +
  theme(legend.position = "top",
    axis.text.x = element_text(angle = 45, hjust = 1)
  )  
  
```

#optionally combine plots 
```{r combined demographics plot}
demo_plot <- plot_grid(sex_plot, rural_plot)  
```
